from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, IntegerType, StringType

if __name__ == '__main__':
    # 构建SparkSession对象
    spark = SparkSession.builder. \
        appName("local[*]"). \
        config("spark.sql.shuffle.partitions", "4"). \
        getOrCreate()
    # appName 设置程序名称
    # config 设置常用属性。可以通过此来设置配置
    # 最后通过getOrCreate 创建 SparkSession对象

    # 从SparkSession中获取SparkContext
    sc = spark.sparkContext

    df = spark.read.format("csv") \
        .option("sep", ";") \
        .option("header", True) \
        .option("encoding", "utf-8") \
        .load("../../data/sql/people.csv")
    # option("sep", ";"):指定分隔符
    # option("header", True):是否有csv标头
    # option("encoding", "utf-8"):指定编码

    df.printSchema()
    df.show(truncate=False)

    # 打印结果为：
    # +-----+----+---------+
    # |name |age |job      |
    # +-----+----+---------+
    # |Jorge|30  |Developer|
    # |Bob  |32  |Developer|
    # |Ani  |11  |Developer|
    # |Lily |11  |Manager  |
    # |Put  |11  |Developer|
    # |Alice|9   |Manager  |
    # |Alice|9   |Manager  |
    # |Alice|9   |Manager  |
    # |Alice|9   |Manager  |
    # |Alice|null|Manager  |
    # |Alice|9   |null     |
    # +-----+----+---------+
